* ======================================================================= 
*  File:    rhoCI.SPS .
*  Date:    3-Jun-2013 .
*  Author:  Bruce Weaver, bweaver@lakeheadu.ca .
* ======================================================================= .

* This file contains an SPSS macro to generate confidence intervals
* for rho, where rho is the parameter corresponding to Pearson's r.

* Examples of how to use the macro can be found in syntax file 
* "rhoCI_examples.SPS".

* The macro uses the following basic equations.

*  Zr = arctanh[r] <==> r = tanh[Zr]

*  d = z_alpha / sqrt[n-3]

*  t = tanh[d]

*                       tanh[Zr] +- tanh[d]     r +- t
* ci = tanh[Zr +- d] = --------------------- = --------
*                      1 +- tanh[Zr]*tanh[d]   1 +- r*t

* The rightmost term in the last equation works even when r = +1 or -1.

* SPSS has no tanh and arctanh functions.  
* HOWEVER, one can use IDF.LOGISTIC and CDF.LOGISTIC as follows:

*  arctanh[r] = .5*ln((1+r)/(1-r)) = .5*idf.logistic((1+r)/2,0,1)
*  tanh[d] = (exp(2*d)-1)/(exp(2*d)+1) = 2*cdf.logistic(2*d,0,1)-1

* SOURCES:
* http://people.math.sfu.ca/~cbm/aands/page_83.htm (see Equation 4.5.26).
* http://people.math.sfu.ca/~cbm/aands/intro.htm#001 .

* Note that the first two macro arguments, DataSetName and Vars
* are required.  The others arguments can be omitted, in which
* case they will take the default values specified below.
* Note too that the macro syntax is NOT case sensitive.

DEFINE !rhoCI
 ( DataSetName = !CHAREND('/') /
   Vars = !CHAREND('/') /
   ConfidenceLevel = !DEFAULT(95) !CHAREND('/') /
   ListWise = !DEFAULT(0) !CHAREND('/') /
   CorrMat = !DEFAULT(0)!CMDEND ).

* Suppress all output.

OMS /DESTINATION VIEWER=NO /TAG='suppressall'.

DATASET DECLARE @corrmat.
DATASET ACTIVATE !DataSetName.

* Use MATRIX=OUT option of CORRELATIONS procedure
* to send the desired correlations to another dataset.

CORRELATIONS
  /VARIABLES=!vars
  /PRINT=TWOTAIL NOSIG
  /MATRIX=OUT('@corrmat')
!IF (!ListWise !EQ 1) !THEN
  /MISSING=LISTWISE.
!ELSE
  /MISSING=PAIRWISE.
!IFEND

DATASET ACTIVATE @corrmat.
RENAME VARIABLES (varname_ = X).
SELECT if not any(rowtype_,"MEAN","STDDEV").
EXECUTE.

* Use VARSTOCASES to restructure from a square correlation matrix
* to a long file format.

************************************************************************.
* Thanks to Sacha Dubois for pointing out the "Variable Name" Index 
* option for VARSTOCASES, and for reminding that when all else fails,
* one should RTFM.
************************************************************************.

VARSTOCASES
  /ID=id
  /MAKE V FROM !Vars
  /INDEX=Y(V) 
  /KEEP=ROWTYPE_ X
  /NULL=KEEP.

* Some of the data management steps differ depending on
* whether one selected LISTWISE or PAIRWISE deletion.

***** LISTWISE *****.

!IF (!ListWise !EQ 1) !THEN

IF rowtype_ EQ "CORR" r = V.
IF rowtype_ EQ "N" n = V.
IF missing(n) n = lag(n).
FORMATS n (f8.0).
EXECUTE.

!IFEND

***** PAIRWISE *****.

!IF (!ListWise !NE 1) !THEN

IF ($casenum EQ 1) OR 
   ( rowtype_ EQ "CORR" and LAG(rowtype_) eq "N") MySorter = 1.
IF missing(MySorter) MySorter = LAG(MySorter) + 1.
FORMATS MySorter(f5.0).
EXECUTE.

SORT CASES by MySorter rowtype_ .
DO IF rowtype_ EQ "N".
+ COMPUTE r = Lag(V).
+ COMPUTE n = V.
END IF.
EXECUTE.
!IFEND

***** COMMON TO LISTWISE & PAIRWISE *****.

SELECT if not missing(r). /* Keep only 1 row per correlation.
STRING Notes (a45).
COMPUTE Notes = "".
EXECUTE.

DO IF X NE Y. /* Omit main diagonal.
+ DO IF n GT 3.
-   COMPUTE z = idf.normal(!ConfidenceLevel/200 + .5,0,1).
-   COMPUTE tanh_d = 2*cdf.logistic(2*z/sqrt(n-3),0,1) - 1.
-   COMPUTE Lower = ( r - tanh_d ) / ( 1 - r*tanh_d ) .
-   COMPUTE Upper = ( r + tanh_d ) / ( 1 + r*tanh_d ) .
+ END IF.
+ DO IF (n GT 2) and ABS(r) LT 1.
-    DO IF ABS(r) LT 1.
+      COMPUTE df = n-2.
+      COMPUTE t = r*SQRT(df/(1-r**2)).
+      COMPUTE p = cdf.t(-abs(t),df)*2. 
-    ELSE IF ABS(r) EQ 1.
+      COMPUTE p = 0.
-    END IF.
+ END IF.
+ DO IF (n LE 2).
-   COMPUTE Notes = "n < 3: p and CI not computed".
+ ELSE if (n LE 3).
-   COMPUTE Notes = "n < 4: CI not computed".
+ ELSE if (n LT 10).
-   COMPUTE Notes = "n < 10: Normal approximation is poor".
+ END IF.
END IF.

FORMATS n df (f5.0) / r Lower Upper (f6.3).

!IF (!ListWise !EQ 1) !THEN
!LET !method = "LISTWISE"
!ELSE
!LET !method = "PAIRWISE"
!IFEND
!LET !title = !CONCAT("Pearson correlations with ",!ConfidenceLevel,"% confidence intervals*")
!LET !footnote = !CONCAT("* With ",!method," deletion.")

OMSEND. /* Turn output on again.

* Suppress the "Case Processing Summary" for SUMMARIZE.

OMS
  /SELECT TABLES
  /IF COMMANDS=['Summarize'] SUBTYPES=['Case Processing Summary']
  /DESTINATION VIEWER=NO.

SUMMARIZE
  /TABLES= X Y r Lower Upper p n Notes
  /FORMAT=VALIDLIST NOCASENUM TOTAL
  /TITLE=!quote(!title)
  /FOOTNOTE=!quote(!footnote)
  /MISSING=VARIABLE
  /CELLS=NONE.

DATASET ACTIVATE !DataSetName.
DATASET CLOSE @corrmat.

* Display CORRELATIONS table for comparison with my table containing CIs.

OMSEND.

!IF (!CorrMat !EQ 1) !THEN

CORRELATIONS
  /VARIABLES=!vars
  /PRINT=TWOTAIL NOSIG
!IF (!ListWise !EQ 1) !THEN
  /MISSING=LISTWISE.
!ELSE
  /MISSING=PAIRWISE.
!IFEND
!IFEND

!ENDDEFINE.

* ======================================================================= .
